######################################################################
# Title:  Do Political Finance Reforms Really Reduce Corruption? A Replication Study

# Description: R code to replicate the results in the article: main analysis
#           
# Authors:   Sergiu Lipcean & Casal Bértoa Fernando
# Date:     05.12.2024
####################################################################### 
setwd("set_your_working_directory_that_contains_data")

## Download required packages 

library(tidyverse)
library(fixest)
library(broom)
library(modelsummary)
library(marginaleffects)
library(patchwork)
library(scales)
library(kableExtra)
library(NatParksPalettes)

### Import the data set from the working directory 
pf_data <- readRDS("pfsi_data.rds")

### Create a custom theme 
custom_theme <- 
  theme_light(base_family = "serif") +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_line(linetype = 3),
        legend.position = "none",
        plot.caption.position = "plot",
        plot.caption = element_text(size = 11, hjust = 0),
        axis.text = element_text(size = 11, hjust = 0.5),
        axis.title = element_text(size = 11, hjust = 0.5))

### Set a custom theme for all figures
theme_set(custom_theme)

## Figure 1: "Relationship between minority vs majority status in HGB data and the share of subsidies in party income from alternative studies"

## Disclaimer: Since data used to build Figure 1 is not in open access, I do not make it available by default. However, I can provide it on request, this relates to the variable dpf_share without which you won't be able to replicate Figure 1

pf_data %>%
  drop_na(dpf_share) %>%
  mutate(majority_pubfin = ifelse(majority_pubfin == 0, "No", "Yes")) %>%
  ggplot(aes(x = majority_pubfin, y = dpf_share)) +
  geom_boxplot(aes(colour = majority_pubfin, fill = majority_pubfin), varwidth = TRUE, notch = F) +
  scale_fill_brewer(palette = "Blues", direction = -1) +
  scale_colour_brewer(palette = "Blues", direction = 1) +
  facet_wrap(~author, ncol = 4) + 
  theme(strip.background = element_blank(),
        strip.text = element_text(colour = "grey10",face = "bold", size = 10)) +
  labs(x = "Majority of subsidies in party budgets", y = "Share of subsidies")

##Save figure 1
ggsave("Figure_1.png", height = 2.5, width = 8, dpi=300)
dev.off()

## Figure 2: "Relationship between de facto amount of party organisational and electoral subsidies per vote and their coding in HGB data"
# Panel "A: Organisational subsidy vs HGB coding"
p1 <- pf_data %>%
  mutate(partyorg_pubfin = ifelse(partyorg_pubfin == 0, "No", "Yes")) %>%
  drop_na(dpfv_stat_usd, partyorg_pubfin) %>%
  ggplot(aes(x = partyorg_pubfin, y = dpfv_stat_usd)) +
  geom_jitter(aes(shape = partyorg_pubfin, colour = partyorg_pubfin),width = 0.15, size = 1.5) +
  scale_y_continuous(n.breaks = 7, trans=pseudo_log_trans(base = 10), limits = c(0,9)) +
  scale_shape_manual(values = c(16, 18)) +
  scale_color_manual(values = c("red4", "steelblue4")) +
  theme(legend.position = "none",
        legend.title = element_blank()) +
  labs(title = "A: Organisational subsidy vs HGB coding",x = "Availability of subsidies", y = "Amount per vote (Nominal USD)")

## Panel "B: Electoral subsidy vs HGB coding"
p2 <- pf_data %>%
  mutate(campaigns_pubfin = ifelse(campaigns_pubfin == 0, "No", "Yes")) %>%
  drop_na(dpfv_elect_usd, campaigns_pubfin) %>%
  ggplot(aes(x = campaigns_pubfin, y = dpfv_elect_usd)) +
  geom_jitter(aes(shape = campaigns_pubfin, colour = campaigns_pubfin), width = 0.15, size = 1.5) +
  scale_y_continuous(n.breaks = 7, trans=pseudo_log_trans(base = 10), limits = c(0,9)) +
  scale_shape_manual(values = c(16, 18)) +
  scale_color_manual(values = c("red4", "steelblue4")) +
  theme(legend.position = "none",
        legend.title = element_blank()) +
  labs(title = "B: Electoral subsidy vs HGB coding", x = "Availability of subsidies", y = "")

## Join panels A & B
p1+p2

## Save Figure 2
ggsave("Figure_2.png", height = 4, width = 8, dpi=300)
dev.off()


## Convert data into panel data for analysis  
pf_data_panel <- panel(pf_data, ~Country + Year)

## Fit Two-Way Fixed-Effects models using original model specification in Hummel et al. and their stock-based measure of public funding plus the same model specification using regionally-split samples: period 1900–2015 

twfe_by_region_full <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

#modelsummary(twfe_by_region_full, stars = TRUE)

## Fit Two-Way Fixed-Effects models using original model specification in Hummel et al. and additive-based measure of public funding plus the same model specification using regionally-split samples: period 1900–2015

twfe_by_region_simple <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfin_add_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

#modelsummary(twfe_by_region_simple, stars = TRUE)

## Fit Two-Way Fixed-Effects models using original model specification in Hummel et al. and additive-based measure of public funding and removed duplicated variables plus the same model specification using regionally-split samples: period 1900–2015 

twfe_by_region_short <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfin_add_short_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

#modelsummary(twfe_by_region_short, stars = TRUE)

## Fit Two-Way Fixed-Effects models using original model specification in Hummel et al. and additive-based measure of public funding, removed duplicated variables and re-coded post-communist regimes, plus the same model specification using regionally-split samples: period 1900–2015  

twfe_by_region_recoded <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

#modelsummary(twfe_by_region_recoded, stars = TRUE)

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI stock 

twfe_by_region_full_df <- twfe_by_region_full %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pubfinindexstock1log_l5") %>%
  mutate(main_estim = twfe_by_region_full$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Stock",
         model_type = "TWFE",
         period = "A: Full data")

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI additive

twfe_by_region_simple_df <- twfe_by_region_simple %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pubfin_add_l5") %>%
  mutate(main_estim = twfe_by_region_simple$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Additive",
         period = "A: Full data")

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI Additive: No duplicates

twfe_by_region_short_df <- twfe_by_region_short %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pubfin_add_short_l5") %>%
  mutate(main_estim = twfe_by_region_short$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Additive: No duplicates",
         period = "A: Full data")

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI Additive: No duplicates & recoded
twfe_by_region_recoded_df <- twfe_by_region_recoded %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pfsi_recoded_l5") %>%
  mutate(main_estim = twfe_by_region_recoded$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "TWFE",
         period = "A: Full data")

## Subset only data for the 1960–2015 period
pf_data_cut <- pf_data %>% filter(Year > 1960)

## Convert to panel data
pf_data_panel_cut <- panel(pf_data_cut, ~Country + Year)

## Fit Two-Way Fixed-Effects models using original model specification in Hummel et al. and their stock-based measure of public funding plus the same model specification using regionally-split samples: period 1960–2015

twfe_by_region_stock_cut <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel_cut, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Western Europe and North America")))

#modelsummary(twfe_by_region_stock_cut, stars = TRUE)

## Fit Two-Way Fixed-Effects models using original model specification in Hummel et al. and additive-based measure of public funding plus the same model specification using regionally-split samples: period 1960–2015

twfe_by_region_simple_cut <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfin_add_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel_cut, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Western Europe and North America")))

#modelsummary(twfe_by_region_simple, stars = TRUE)

## Fit Two-Way Fixed-Effects models using original model specification in Hummel et al. and additive-based measure of public funding and removed duplicated variables plus the same model specification using regionally-split samples: period 1960–2015 

twfe_by_region_short_cut <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfin_add_short_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel_cut, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Western Europe and North America")))

#modelsummary(twfe_by_region_short, stars = TRUE)

## Fit Two-Way Fixed-Effects models using original model specification in Hummel et al. and additive-based measure of public funding, removed duplicated variables and re-coded post-communist regimes, plus the same model specification using regionally-split samples: period 1960–2015 

twfe_by_region_recoded_cut <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel_cut, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Western Europe and North America")))

#modelsummary(twfe_by_region_recoded, stars = TRUE)


### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI stock, Post-1960 data 

twfe_by_region_stock_cut_df <- twfe_by_region_stock_cut %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pubfinindexstock1log_l5") %>%
  mutate(main_estim = twfe_by_region_stock_cut$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Stock",
         model_type = "TWFE",
         period = "B: Post-1960 data")

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI additive, Post-1960 data 

twfe_by_region_simple_cut_df <- twfe_by_region_simple_cut %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pubfin_add_l5") %>%
  mutate(main_estim = twfe_by_region_simple_cut$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Additive",
         period = "B: Post-1960 data")

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI Additive: No duplicates, Post-1960 data

twfe_by_region_short_cut_df <- twfe_by_region_short_cut %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pubfin_add_short_l5") %>%
  mutate(main_estim = twfe_by_region_short_cut$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Additive: No duplicates",
         period = "B: Post-1960 data")

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI Additive: No duplicates & recoded, Post-1960 data

twfe_by_region_recoded_cut_df <- twfe_by_region_recoded_cut %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pfsi_recoded_l5") %>%
  mutate(main_estim = twfe_by_region_recoded_cut$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "TWFE",
         period = "B: Post-1960 data")


## Collect all estimates into a data frame to use for Figure 3
combined_pfsi <- bind_rows(twfe_by_region_full_df, 
                           twfe_by_region_simple_df,
                           twfe_by_region_short_df,
                           twfe_by_region_recoded_df,
                           twfe_by_region_stock_cut_df, 
                           twfe_by_region_simple_cut_df,
                           twfe_by_region_short_cut_df,
                           twfe_by_region_recoded_cut_df) %>%
  mutate(measure_type = factor(measure_type, 
                               levels = c("PFSI Stock", 
                                          "PFSI Additive",
                                          "PFSI Additive: No duplicates",
                                          "PFSI Additive: No duplicates & recoded"))) %>%
  mutate(id = factor(id, levels = c("Latin America & the Caribbean", "Sub-Saharan Africa", "South-East Asia & the Pacific", "Western Europe & North America", "Eastern Europe & Central Asia", "Middle East & North Africa", "Global")))

## Figure 3: "PFSI and corruption by region"

ggplot(combined_pfsi, aes(x = id, y= estimate)) +
  geom_point(aes(colour = measure_type, shape = measure_type), size = 2.5, position = position_dodge(width = 0.7)) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high, colour = measure_type, width = 0), linewidth = 0.4, position = position_dodge(width = 0.7)) +
  geom_errorbar(aes(ymin = conf_low_90, ymax = conf_high_90, colour = measure_type, width = 0), linewidth = 0.8, position = position_dodge(width = 0.7)) +
  scale_shape_manual(values = c(18:15)) +
  geom_hline(yintercept = 0, linetype = 5, color = "grey40") +
  scale_colour_natparks_d("Yosemite") +
  scale_x_discrete(labels = wrap_format(20)) +
  scale_y_continuous(n.breaks = 5) +
  guides(colour = guide_legend(nrow = 2, reverse = T),
         shape = guide_legend(nrow = 2, reverse = T),
         linetype = guide_legend(nrow = 2, reverse = T)) +
  coord_flip() +
  facet_wrap(~period, ncol = 2) +
  theme(axis.text = element_text(size = 12, colour = "grey10"),
        strip.text = element_text(size = 12, colour = "grey10"),
        strip.background = element_blank(),
        legend.position = "top",
        legend.title = element_blank(),
        legend.text = element_text(size = 12),
        legend.margin = margin(t = 0, r = 0, b = -0.5, l = 0, unit = "lines"),
        plot.caption = element_text(size = 11)) +
  labs(x = "", y = "Estimates with 90% and 95% Confidence Intervals")

## Save Figure 3
ggsave("Figure_3.png", height = 5.6, width = 8, dpi=400)
dev.off()

## Fit Country Fixed-Effects models using the stock-based measure of public funding plus the same model specification using regionally-split samples: period 1900–2015

twfe_by_region_country <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))


## Fit Year Fixed-Effects models using the stock-based measure of public funding plus the same model specification using regionally-split samples: period 1900–2015

twfe_by_region_year <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Fit Country Fixed-Effects models using the stock-based measure of public funding plus the same model specification using regionally-split samples: period 1960–2015

twfe_by_region_country_sh <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country, data = subset(pf_data_panel, Year > 1960), vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Fit Year Fixed-Effects models using the stock-based measure of public funding plus the same model specification using regionally-split samples: period 1960–2015

twfe_by_region_year_sh <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Year, data = subset(pf_data_panel, Year > 1960), vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI stock & country fixed-effects - Full data

twfe_by_region_country_df <- twfe_by_region_country %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pubfinindexstock1log_l5") %>%
  mutate(main_estim = twfe_by_region_full$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Stock",
         model_type = "Country FE",
         period = "A: Full data")

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI stock & year fixed-effects - Full data

twfe_by_region_year_df <- twfe_by_region_year %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pubfinindexstock1log_l5") %>%
  mutate(main_estim = twfe_by_region_full$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Stock",
         model_type = "Year FE",
         period = "A: Full data")

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI stock & country fixed-effects - Post-1960 data

twfe_by_region_country_sh_df <- twfe_by_region_country_sh %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pubfinindexstock1log_l5") %>%
  mutate(main_estim = twfe_by_region_stock_cut$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Stock",
         model_type = "Country FE",
         period = "B: Post-1960 data")

### Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI stock & year fixed-effects - Post-1960 data

twfe_by_region_year_sh_df <- twfe_by_region_year_sh %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pubfinindexstock1log_l5") %>%
  mutate(main_estim = twfe_by_region_stock_cut$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Stock",
         model_type = "Year FE",
         period = "B: Post-1960 data")


## Fit Country Fixed-Effects models using the additive-based measure of public funding, removed duplicated variables and re-coded post-communist regimes, plus the same model specification using regionally-split samples: period 1900–2015 

recoded_by_region_country <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Fit Year Fixed-Effects models using the additive-based measure of public funding, removed duplicated variables and re-coded post-communist regimes, plus the same model specification using regionally-split samples: period 1900–2015 

recoded_by_region_year <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Fit Country Fixed-Effects models using the additive-based measure of public funding, removed duplicated variables and re-coded post-communist regimes, plus the same model specification using regionally-split samples: period 1960–2015 

recoded_by_region_country_sh <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country, data = subset(pf_data_panel, Year > 1960), vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Fit Year Fixed-Effects models using the additive-based measure of public funding, removed duplicated variables and re-coded post-communist regimes, plus the same model specification using regionally-split samples: period 1960–2015 

recoded_by_region_year_sh <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Year, data = subset(pf_data_panel, Year > 1960), vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI Additive: No duplicates & recoded, country fixed-effects & full data

recoded_by_region_country_df <- recoded_by_region_country %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pfsi_recoded_l5") %>%
  mutate(main_estim = twfe_by_region_recoded$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "Country FE",
         period = "A: Full data")

## Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI Additive: No duplicates & recoded, year fixed-effects & full data

recoded_by_region_year_df <- recoded_by_region_year %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pfsi_recoded_l5") %>%
  mutate(main_estim = twfe_by_region_recoded$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "Year FE",
         period = "A: Full data")

## Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI Additive: No duplicates & recoded, country fixed-effects & Post-1960 data

recoded_by_region_country_sh_df <- recoded_by_region_country_sh %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pfsi_recoded_l5") %>%
  mutate(main_estim = twfe_by_region_recoded_cut$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "Country FE",
         period = "B: Post-1960 data")

## Extract average marginal effects and calculate confidence intervals based on robust std.errors clustered by country and year: PFSI Additive: No duplicates & recoded, year fixed-effects & Post-1960 data

recoded_by_region_year_sh_df <- recoded_by_region_year_sh %>%
  map(., ~tidy(.x, conf.int = TRUE)) %>%
  bind_rows(.id = "id") %>%
  filter(term == "pfsi_recoded_l5") %>%
  mutate(main_estim = twfe_by_region_recoded_cut$Global$coefficients[[1]],
         conf_low_90 = estimate - 1.645*std.error,
         conf_high_90 = estimate + 1.645*std.error,
         measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "Year FE",
         period = "B: Post-1960 data")

## Collect all estimates into a data frame to use for Figure 4
twfe_cfe_tfe <- bind_rows(twfe_by_region_full_df, 
                          twfe_by_region_stock_cut_df, 
                          twfe_by_region_country_df,
                          twfe_by_region_country_sh_df,
                          twfe_by_region_year_df,
                          twfe_by_region_year_sh_df,
                          twfe_by_region_recoded_df,
                          twfe_by_region_recoded_cut_df,
                          recoded_by_region_country_df,
                          recoded_by_region_year_df,
                          recoded_by_region_country_sh_df,
                          recoded_by_region_year_sh_df) %>%
  mutate(model_type = factor(model_type, 
                               levels = c("TWFE", 
                                          "Country FE",
                                          "Year FE")),
         measure_type = factor(measure_type, levels = c("PFSI Stock", "PFSI Additive: No duplicates & recoded"))) %>%
  mutate(id = factor(id, levels = c("Latin America & the Caribbean",    "Sub-Saharan Africa", "South-East Asia & the Pacific", "Western Europe & North America", "Eastern Europe & Central Asia", "Middle East & North Africa", "Global")))

## Figure 4: "PFSI and corruption by model type and PFSI type and region"

ggplot(twfe_cfe_tfe, aes(x = id, y= estimate)) +
  geom_point(aes(colour = model_type, shape = model_type), size = 2.5, position = position_dodge(width = 0.7)) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high, colour = model_type, width = 0), linewidth = 0.4, position = position_dodge(width = 0.7)) +
  geom_errorbar(aes(ymin = conf_low_90, ymax = conf_high_90, colour = model_type, width = 0), linewidth = 0.8, position = position_dodge(width = 0.7)) +
  scale_shape_manual(values = c(17:15)) +
  geom_hline(yintercept = 0, linetype = 5, color = "grey40") +
  scale_colour_natparks_d("Yosemite") +
  scale_x_discrete(labels = wrap_format(20)) +
  scale_y_continuous(n.breaks = 5) +
  guides(colour = guide_legend(nrow = 1),
         shape = guide_legend(nrow = 1),
         linetype = guide_legend(nrow = 1)) +
  coord_flip() +
  facet_grid(measure_type~period) +
  theme(axis.text = element_text(size = 12),
        strip.text = element_text(size = 12, colour = "grey20"),
        strip.background = element_blank(),
        legend.position = "top",
        legend.title = element_blank(),
        legend.text = element_text(size = 12),
        legend.margin = margin(t = 0, r = 0, b = -0.5, l = 0, unit = "lines"),
        plot.caption = element_text(size = 11)) +
  labs(x = "", y = "Estimates with 90% and 95% Confidence Intervals")

## Save Figure 4
ggsave("Figure_4.png", height = 8, width = 8, dpi=400)
dev.off()

